Purpose

This document will explore the distribution of MLGs across years as well as filtering strategies.

Required packages and data

library(PramCurry)
## Loading required package: poppr
## Loading required package: adegenet
## Loading required package: ade4
##    ==========================
##     adegenet 1.4-2 is loaded
##    ==========================
## 
##  - to start, type '?adegenet'
##  - to browse adegenet website, type 'adegenetWeb()'
##  - to post questions/comments: adegenet-forum@lists.r-forge.r-project.org
## 
## 
## This is poppr version 1.1.2.99.70. To get started, type package?poppr
library(reshape2)
library(ggplot2)
library(ape)
library(poppr)
library(adegenet)
options(stringsAsFactors = FALSE)
data(ramdat)
data(pop_data)
data(myPal)
devtools::source_gist(2877821)
## Sourcing https://gist.githubusercontent.com/baptiste/2877821/raw/d0e2fc28b88f912543b9d6a2a6bf799fd822524c/set_panel_size.r
## SHA-1 hash of file is 791923a0cf8a28d5d60479bdd8fb08808d11c467
## Loading required package: grid
sessionInfo()
## R version 3.1.2 (2014-10-31)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] ape_3.1-4.5       ggplot2_1.0.0     reshape2_1.4      PramCurry_1.0    
## [5] poppr_1.1.2.99-70 adegenet_1.4-2    ade4_1.6-2       
## 
## loaded via a namespace (and not attached):
##  [1] assertthat_0.1      bitops_1.0-6        boot_1.3-11        
##  [4] caTools_1.17.1      colorspace_1.2-4    devtools_1.6       
##  [7] digest_0.6.4        dplyr_0.2           evaluate_0.5.5     
## [10] fastmatch_1.0-4     formatR_1.0         ggmap_2.3          
## [13] gtable_0.1.2        htmltools_0.2.6     httpuv_1.3.0       
## [16] httr_0.5            igraph_0.7.1        jsonlite_0.9.11    
## [19] knitr_1.6           lattice_0.20-29     mapproj_1.2-2      
## [22] maps_2.3-9          MASS_7.3-34         Matrix_1.1-4       
## [25] munsell_0.4.2       nlme_3.1-117        parallel_3.1.2     
## [28] pegas_0.6           permute_0.8-3       phangorn_1.99-7    
## [31] plyr_1.8.1          png_0.1-7           proto_0.3-10       
## [34] Rcpp_0.11.2         RCurl_1.95-4.3      RgoogleMaps_1.2.0.6
## [37] rjson_0.2.14        RJSONIO_1.3-0       rmarkdown_0.3.10   
## [40] scales_0.2.4        shiny_0.10.1        stringr_0.6.2      
## [43] tools_3.1.2         vegan_2.0-10        xtable_1.7-4       
## [46] yaml_2.1.13

Analysis of unfiltered data

Basic Barplots

First thing to do is to look at the distribution of the MLGs per year.

ramdat@other$oldmlg <- mlg.vector(ramdat)
(unfiltered <- mlg.table(ramdat, total = TRUE))
## | 2001

plot of chunk unnamed-chunk-2

## | 2002

plot of chunk unnamed-chunk-2

## | 2003

plot of chunk unnamed-chunk-2

## | 2004

plot of chunk unnamed-chunk-2

## | 2005

plot of chunk unnamed-chunk-2

## | 2006 
## | 2010 
## | 2011

plot of chunk unnamed-chunk-2

## | 2012

plot of chunk unnamed-chunk-2

## | 2013

plot of chunk unnamed-chunk-2

## | 2014

plot of chunk unnamed-chunk-2

## | Total

plot of chunk unnamed-chunk-2

##       MLG.1 MLG.2 MLG.3 MLG.4 MLG.5 MLG.6 MLG.7 MLG.8 MLG.9 MLG.10 MLG.11
## 2001      0     0     0     1     0     0     0     0     0      0      0
## 2002      0     0     1     1     0     0     0     0     0      0      0
## 2003      0     0     0     1     0     0     0     0     0      0      0
## 2004      0     0     0     0     0     0     0     0     0      0      0
## 2005      0     0     0     0     0     0     0     0     0      0      0
## 2006      0     0     0     0     0     0     0     0     0      0      0
## 2010      0     0     0     0     0     0     0     0     0      0      0
## 2011      0     0     0     0     0     0     0     0     0      0      0
## 2012      0     0     0     0     0     0     0     0     0      0      0
## 2013      1     1     0     2     3     1     1     0     1      1      1
## 2014      0     0     0     0     0     0     0     1     0      0      0
## Total     1     1     1     5     3     1     1     1     1      1      1
##       MLG.12 MLG.13 MLG.14 MLG.15 MLG.16 MLG.17 MLG.18 MLG.19 MLG.20
## 2001       0      0      0      0      1      0      0      1      1
## 2002       0      0      0      0      0      2      0      0      0
## 2003       0      0      3      0      1      4      0      0      1
## 2004       0      0      0      0      0      1      0      0      0
## 2005       0      0      0      0      0      0      0      0      0
## 2006       0      0      0      0      0      0      0      0      0
## 2010       0      0      0      0      0      0      0      0      0
## 2011       0      0      0      0      0      0      0      0      0
## 2012       0      0      2      0      0      1      5      0      0
## 2013       5      1     33      3      1      9      3      0      1
## 2014       1      0      5      2      0      0      0      0      0
## Total      6      1     43      5      3     17      8      1      3
##       MLG.21 MLG.22 MLG.23 MLG.24 MLG.25 MLG.26 MLG.27 MLG.28 MLG.29
## 2001       3     28      0      1      0      1      1      0      0
## 2002       0     23      2      0      0      0      0      3      2
## 2003       5     58      1      1      0      0      2      4      3
## 2004       0     16      0      0      0      0      0      4      0
## 2005       0      0      0      0      0      0      0      0      0
## 2006       0      0      0      0      0      0      0      0      0
## 2010       0      0      0      0      0      0      0      0      0
## 2011       0      1      0      0      0      0      0      0      0
## 2012       1      0      8      1      0      0      0      0      0
## 2013       2     21     12      4      1      0      1      8      5
## 2014       0      2      0      0      0      0      0      2      1
## Total     11    149     23      7      1      1      4     21     11
##       MLG.30 MLG.31 MLG.32 MLG.33 MLG.34 MLG.35 MLG.36 MLG.37 MLG.38
## 2001       0      0      0      0      0      0      0      0      0
## 2002       0      0      0      0      0      0      0      0      0
## 2003       0      0      0      0      0      0      0      0      0
## 2004       0      0      0      0      0      0      0      0      0
## 2005       1      0      0      0      0      0      0      0      0
## 2006       0      0      0      0      0      0      0      0      0
## 2010       0      0      0      1      0      0      0      0      0
## 2011       0      0      5      0      0      0      0      0      0
## 2012       0      0      0      0      0      0      0      0      0
## 2013       1      1      0      0      0      1      2      1      3
## 2014       0      0      0      2      1      0      0      0      0
## Total      2      1      5      3      1      1      2      1      3
##       MLG.39 MLG.40 MLG.41 MLG.42 MLG.43 MLG.44 MLG.45 MLG.46 MLG.47
## 2001       0      0      1      0      0      0      0      0      0
## 2002       0      0      0      0      0      0      0      0      0
## 2003       0      0      0      0      0      0      0      0      0
## 2004       0      0      0      0      0      0      0      0      0
## 2005       0      0      0      0      0      0      0      0      0
## 2006       0      0      0      0      0      0      0      1      0
## 2010       0      0      0      0      0      0      0      0      0
## 2011       0      0      0      0      0      0      0      0      0
## 2012       0      0      0      0      0      0      0      0      0
## 2013       0      1      1      1      1      1      0      3      1
## 2014       1      0      0      0      0      0      2      1      0
## Total      1      1      2      1      1      1      2      5      1
##       MLG.48 MLG.49 MLG.50 MLG.51 MLG.52 MLG.53 MLG.54 MLG.55 MLG.56
## 2001       0      0      0      0      0      0      0      0      0
## 2002       0      0      0      0      0      0      0      0      0
## 2003       0      0      0      2      0      0      0      0      0
## 2004       0      0      0      0      2      0      0      0      0
## 2005       0      0      0      0      0      0      0      0      0
## 2006       0      0      0      0      0      0      0      0      0
## 2010       0      0      0      0      0      0      0      0      0
## 2011       0      0      0      0      1      0      0      1      0
## 2012       0      0      0      0      0      2      0      0      0
## 2013       3      1      0      0      6     13     12      0      1
## 2014       0      0      1      0      4      2      1      0      0
## Total      3      1      1      2     13     17     13      1      1
##       MLG.57 MLG.58 MLG.59 MLG.60 MLG.61 MLG.62 MLG.63 MLG.64 MLG.65
## 2001       0      0      0      0      0      0      0      0      0
## 2002       0      0      0      0      0      0      0      0      0
## 2003       0      0      0      1      0      0      0      1      1
## 2004       0      0      0      0      0      0      0      0      0
## 2005       0      0      0      0      0      0      0      0      0
## 2006       0      0      0      0      0      0      0      0      0
## 2010       0      0      0      0      0      0      0      0      0
## 2011       0      1     59      0      0      0      0      0      0
## 2012       0      0      0      0      0      0      0      0      0
## 2013       1      0      0      0      1      1      0      0      0
## 2014       0      0      0      0      0      0      1      0      0
## Total      1      1     59      1      1      1      1      1      1
##       MLG.66 MLG.67 MLG.68 MLG.69 MLG.70
## 2001       0      0      0      0      0
## 2002       0      0      1      1      0
## 2003       0      1     10      1      1
## 2004       1      1      8      0      2
## 2005       0      0      1      0      0
## 2006       0      0      0      0      0
## 2010       0      0      0      0      0
## 2011       0      0      0      0      0
## 2012       0      0      0      0      0
## 2013       0      0      0      0      0
## 2014       0      0      0      0      0
## Total      1      2     20      2      3
totplot <- last_plot()

Heatmaps

options(digits = 10)
(newReps <- other(ramdat)$REPLEN)
##  PrMS6A1  Pr9C3A1 PrMS39A1 PrMS45A1 PrMS43A1 
##        3        2        2        4        4
newReps[3] <- 4
(newReps <- fix_replen(ramdat, newReps))
##  PrMS6A1  Pr9C3A1 PrMS39A1 PrMS45A1 PrMS43A1 
##  3.00000  2.00000  4.00001  4.00000  4.00000
mlg.heatmap(unfiltered)

plot of chunk unnamed-chunk-3

mlg.barplot(unfiltered, pal = myPal)

plot of chunk unnamed-chunk-3

uf <- unfiltered[-nrow(unfiltered), ]
mlg.barplot(uf[rowSums(uf) > 10, ], total = FALSE, pal = myPal)

plot of chunk unnamed-chunk-3

setpop(ramdat) <- ~ZONE1
zones <- mlg.table(ramdat, bar = FALSE)
# mlg.heatmap(zones)
mlg.barplot(zones, pal = myPal)

plot of chunk unnamed-chunk-5

setpop(ramdat) <- ~Pop
(bars <- ggplot(totplot$data, aes(x = MLG, y = count, fill = MLG)) + 
  geom_bar(stat = "identity") +
  theme_classic() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 160)) +
  scale_fill_manual(values = myPal) +
  guides(fill = guide_legend(ncol = 2, title = NULL, )) +
  geom_text(aes(label = count), size = 2.5, hjust = 0, font = "bold") +
  theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(),
        #element_text(angle = -45, vjust = 1, hjust = 0, size = 5),
        #legend.position = c(1, 0), legend.direction = "vertical",
        legend.position = "none",
        legend.text = element_text(size = rel(0.65)),
        legend.justification = c(1, 0),
        legend.background = element_rect(color = "black"),
        legend.margin = grid::unit(0, "pt"),
        legend.key.size = grid::unit(15, "pt"),
        text = element_text(family = "Helvetica"),
        axis.title.y = element_blank()) + 
  scale_x_discrete(limits = rev(totplot$data$MLG)) +
  coord_flip())

plot of chunk unnamed-chunk-6

# ggsave("mlg_dist.svg", plot = set_panel_size(bars, 
#                                              width = unit(3, "in"),
#                                              height = unit(12, "in")),
#        width = 6, height = 15)


# ggsave("mlg_dist_ppt.svg", 
#        plot = set_panel_size(bars, width = unit(30*1.2, "mm"), 
#                              height = unit(225*1.2, "mm")),
#        width = 88,
#        height = 250, 
#        units = "mm", 
#        family = "Helvetica", 
#        scale = 1.2,
#        pointsize = 12)

Get a sense of when these arose:

ramdat.cc <- clonecorrect(ramdat, hier = NA)

rangers <- mlg.crosspop(ramdat, mlgsub = unique(ramdat@mlg), df = TRUE, quiet = TRUE)
names(rangers)[2] <- "Year"
rangers$MLG <- factor(rangers$MLG, levels = colnames(unfiltered))

(ranges <- ggplot(rangers, aes(x = Year, y = MLG, group = MLG)) + 
  geom_line(aes(color = MLG), size = 1, linetype = 1) + 
  geom_point(aes(color = MLG), size = 5, pch = 21, fill = "white") +
  geom_text(aes(label = Count), size = 2.5) + 
  scale_color_manual(values = myPal) + 
  guides(color = guide_legend(ncol = 3)) +
  ylab("Multilocus Genotype") +
  theme_bw() + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        text = element_text(family = "Helvetica"),
        legend.position = "none",
        axis.line = element_line(colour = "black")) +
#   theme(panel.grid.major.y = element_line(size = 0.5, color = "grey")) +
#   theme(panel.grid.major.x = element_line(size = 1, color = "grey")) + 
  scale_y_discrete(limits = rev(totplot$data$MLG)))

plot of chunk unnamed-chunk-7

# ggsave("mlg_range.svg", plot = set_panel_size(bars, 
#                                               width = unit(5, "in"),
#                                               height = unit(12, "in")),
#        width = 6, height = 15)


# ggsave("mlg_range_ppt.svg", 
#        plot = set_panel_size(ranges, width = unit(44*1.2, "mm"),
#                              height = unit(225*1.2, "mm")),
#        width = 88, height = 250, units = "mm", family = "Helvetica", 
#        pointsize = 12, scale = 1.2)

pamat     <- unfiltered > 0
pamat     <- pamat[-nrow(pamat), ]
timetable <- MLG_range(pamat)

origin.df <- data.frame(timetable[paste("MLG", ramdat.cc@mlg, sep = "."), ])
origin.df$MLG <- rownames(origin.df)
counts <- unfiltered["Total", origin.df$MLG]
origin.df$newMLG <- paste(origin.df$first, pad_zeroes(1:nrow(origin.df)), sep = ".")

Trees

Now that we have this information, obtaining a bootstrapped dendrogram would be good. Bruvo’s distance would be a good choice since this is a clonal epidemic and we would expect mutations in a stepwise fashion.

set.seed(5555)
system.time(njtree <- bruvo.boot(ramdat, replen = newReps,#other(ramdat)$REPLEN, 
                                 sample = 100, quiet = TRUE, showtree = FALSE,
                                 tree = "nj")
            )
## Warning: Some branch lengths of the tree are negative. Normalizing
## branches according to Kuhner and Felsenstein (1994)
##    user  system elapsed 
## 227.029   2.236 233.699
njtree$tip.label <- paste("MLG", ramdat@mlg, sep = ".")
njtree <- ladderize(njtree)
plot.phylo(njtree, cex = 0.8, font = 2, adj = 0, xpd = TRUE, 
           label.offset = 1/800, tip.col = myPal[ramdat@mlg])
nodelabels(ifelse(njtree$node.label > 50, njtree$node.label, NA), 
           adj = c(1.3, -0.5), frame = "n", 
           cex = 0.8, font = 3, xpd = TRUE)
add.scale.bar(lwd = 5)

plot of chunk unnamed-chunk-8

set.seed(5001)
system.time(njtree.cc <- bruvo.boot(ramdat.cc, replen = newReps,#other(ramdat)$REPLEN, 
                                 sample = 1000, quiet = TRUE, showtree = FALSE, 
                                 tree = "nj")
            )
##    user  system elapsed 
##  56.917   2.678  62.375
njtree.cc$tip.label <- paste(origin.df$newMLG, origin.df$MLG, sep = "_")
names(myPal) <- njtree.cc$tip.label[order(mlgFromString(origin.df$MLG))]
yearPal <- deepseasun(nrow(pamat))
names(yearPal) <- rownames(pamat)
njtree.cc <- ladderize(njtree.cc)
par(mfrow = c(1, 2))
plot.phylo(njtree.cc, cex = 0.8, font = 2, xpd = TRUE,  adj = 0,
           tip.col = yearPal[substr(njtree.cc$tip.label, 1, 4)])
nodelabels(ifelse(njtree.cc$node.label > 50, njtree.cc$node.label, NA), 
           adj = c(1.3, -0.5), 
           frame = "n", 
           cex = 0.8, font = 3, xpd = TRUE)
add.scale.bar(lwd = 5)

legend("bottomright", fill = yearPal, legend = names(yearPal), bty = "n", 
       title = "Year first\nisolated",
       cex = 0.75,)


plot.phylo(njtree.cc, cex = 0.8, font = 2, xpd = TRUE,  adj = 0,
           tip.col = myPal[njtree.cc$tip.label])
nodelabels(ifelse(njtree.cc$node.label > 50, njtree.cc$node.label, NA), 
           adj = c(1.3, -0.5), 
           frame = "n", 
           cex = 0.8, font = 3, xpd = TRUE)
add.scale.bar(lwd = 5)

plot of chunk unnamed-chunk-9

par(mfrow = c(1, 1))
njtree.cc$tip.label <- origin.df$MLG
names(myPal) <- origin.df$MLG[order(mlgFromString(origin.df$MLG))]
assoc <- matrix(c(njtree$tip.label, njtree$tip.label), ncol = 2)
cophyloplot(njtree, njtree.cc, assoc, use.edge.length = FALSE, space = 1000,
            show.tip.label = FALSE, 
            col = myPal[assoc[, 1]], lwd = 3, gap = 1)

plot of chunk cophylo